{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a7921ace-4d57-4a8e-934c-7e49a4f268e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install hdfs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db993abd-02bd-465b-a94a-ad7ed7e0e234",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hdfs import InsecureClient\n",
    "import os\n",
    "\n",
    "# Create a HDFS connector client\n",
    "hdfs_client = InsecureClient(\"http://hive:50070\", user='root')\n",
    "\n",
    "# List HDFS file and directories\n",
    "print(hdfs_client.list('/user/gravitino'))\n",
    "\n",
    "hdfs_client.delete(\"/user/gravitino\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fcfb73be-e369-4543-b78d-fb1cd061c9e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "pip install apache-gravitino==0.9.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "384715f6-a200-448f-b3b2-bf03931769bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Dict, List\n",
    "from gravitino import NameIdentifier, GravitinoAdminClient, GravitinoClient, Catalog, Fileset, FilesetChange\n",
    "import os \n",
    "\n",
    "# Create Gravitino admin client\n",
    "gravitino_admin_client = GravitinoAdminClient(uri=\"http://gravitino:8090\")\n",
    "\n",
    "# Create metalake via Gravitino admin client\n",
    "metalake_name=\"default\"\n",
    "metalake = gravitino_admin_client.create_metalake(name=metalake_name,\n",
    "                                                  comment=\"metalake comment\", \n",
    "                                                  properties={})\n",
    "print(metalake)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "876c1ca4-7c41-4c9d-acde-faf2fe8c3bd8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Gravitino client\n",
    "gravitino_client = GravitinoClient(uri=\"http://gravitino:8090\", metalake_name=metalake_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d74be244-fa6a-4065-bed4-ba47bc5cd32e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Dict, List\n",
    "from gravitino import GravitinoMetalake\n",
    "\n",
    "# List all Gravitino metalake entity\n",
    "metalake_list: List[GravitinoMetalake] = gravitino_admin_client.list_metalakes()\n",
    "print(metalake_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ed1f174-8b21-4191-a1a4-14718eaa87ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create catalog via Gravition client\n",
    "catalog_name=\"catalog\"\n",
    "\n",
    "catalog = gravitino_client.create_catalog(name=catalog_name,\n",
    "                                          catalog_type=Catalog.Type.FILESET,\n",
    "                                          provider=\"hadoop\", \n",
    "                                          comment=\"\",\n",
    "                                          properties={})\n",
    "print(catalog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adf8c2ec-17e2-4550-a1cd-20430d59520c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load catalog entity via Gravition client\n",
    "catalog = gravitino_client.load_catalog(name=catalog_name)\n",
    "print(catalog)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fc69b06-0c7f-4fff-a8d8-4f257399af9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create schema entity via Gravition client\n",
    "schema_name=\"schema\"\n",
    "schema_path=\"/user/gravitino/\"+schema_name\n",
    "schema_hdfs_path=f\"hdfs://hive:9000{schema_path}\"\n",
    "\n",
    "catalog.as_schemas().create_schema(schema_name=schema_name, \n",
    "                                   comment=\"\", \n",
    "                                   properties={\"location\":schema_hdfs_path})\n",
    "\n",
    "# Check if the schema location was successfully created in HDFS\n",
    "try:\n",
    "    info = hdfs_client.status(schema_path)\n",
    "    print(f\"Success: The storage location {schema_path} was successfully created.\")\n",
    "    print(\"Details:\", info)\n",
    "except Exception:\n",
    "    print(f\"Faild: The storage location {schema_path} was not successfully created.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53ae1b3f-53a0-42c3-b9c2-3ab6ed0defd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Create a managed type of Fileset\n",
    "managed_fileset_name=\"managed_fileset\"\n",
    "managed_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+managed_fileset_name\n",
    "managed_fileset_hdfs_path=f\"hdfs://hive:9000{managed_fileset_path}\"\n",
    "\n",
    "managed_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name, managed_fileset_name)\n",
    "catalog.as_fileset_catalog().create_fileset(ident=managed_fileset_ident,\n",
    "                                            fileset_type=Fileset.Type.MANAGED,\n",
    "                                            comment=\"\",\n",
    "                                            storage_location=managed_fileset_hdfs_path,\n",
    "                                            properties={})\n",
    "\n",
    "# Check if the fileset location was successfully created in HDFS\n",
    "try:\n",
    "    info = hdfs_client.status(managed_fileset_path)\n",
    "    print(f\"Success: The storage location {managed_fileset_path} was successfully created.\")\n",
    "    print(\"Details:\", info)  # print hdfs path detail informations\n",
    "except Exception:\n",
    "    print(f\"Faild: The storage location {managed_fileset_path} was not successfully created.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08d7b09f-e732-4c4a-aff4-8f7e0aaa61de",
   "metadata": {},
   "outputs": [],
   "source": [
    "external_fileset_name=\"external_fileset\"\n",
    "external_fileset_path=\"/user/gravitino/\"+schema_name+\"/\"+external_fileset_name\n",
    "external_fileset_hdfs_path=f\"hdfs://hive:9000{external_fileset_path}\"\n",
    "\n",
    "# Create a fileset path in HDFS in advance\n",
    "hdfs_client.makedirs(external_fileset_path)\n",
    "try:\n",
    "    info = hdfs_client.status(external_fileset_path)\n",
    "    print(f\"Success: The storage location {external_fileset_path} was successfully created.\")\n",
    "    print(\"Details:\", info)  # print hdfs path detail information\n",
    "except Exception:\n",
    "    print(f\"Faild: The storage location {external_fileset_path} was not successfully created.\")\n",
    "\n",
    "# Create an external type of fileset\n",
    "external_fileset_ident: NameIdentifier = NameIdentifier.of(schema_name, external_fileset_name)\n",
    "catalog.as_fileset_catalog().create_fileset(ident=external_fileset_ident,\n",
    "                                            fileset_type=Fileset.Type.EXTERNAL,\n",
    "                                            comment=\"\",\n",
    "                                            storage_location=external_fileset_hdfs_path,\n",
    "                                            properties={})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "097454c8-0214-4466-ac9e-164f88929853",
   "metadata": {},
   "outputs": [],
   "source": [
    "# List all fileset\n",
    "catalog = gravitino_client.load_catalog(name=catalog_name)\n",
    "fileset_list: List[NameIdentifier] = catalog.as_fileset_catalog().list_filesets(namespace=managed_fileset_ident.namespace())\n",
    "print(fileset_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e4761eb-9533-47f0-9078-f7efdd3a01ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load managed fileset\n",
    "managed_fileset=gravitino_client.load_catalog(name=catalog_name).as_fileset_catalog().load_fileset(ident=managed_fileset_ident)\n",
    "print(managed_fileset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ad95186-b16e-483a-97ef-3a4ddec49033",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load external fileset\n",
    "external_fileset=gravitino_client.load_catalog(name=catalog_name).as_fileset_catalog().load_fileset(ident=external_fileset_ident)\n",
    "print(external_fileset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8164406c-808c-47f4-84da-b708dc1d3d3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop managed type of fileset and deleted HDFS location\n",
    "catalog.as_fileset_catalog().drop_fileset(ident=managed_fileset_ident)\n",
    "\n",
    "# Check managed type of fileset location if successfully deleted\n",
    "try:\n",
    "    info = hdfs_client.status(managed_fileset_path)\n",
    "    print(f\"Faild: The storage location {managed_fileset_path} was not successfully deleted.\")\n",
    "except Exception:\n",
    "    print(f\"Success: The storage location {managed_fileset_path} was successfully deleted.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "741e35d2-676f-4b97-b47b-e5374168d1ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop external type of fileset, Should not be deleted HDFS location\n",
    "catalog.as_fileset_catalog().drop_fileset(ident=external_fileset_ident)\n",
    "\n",
    "# Check managed type of fileset location if reserved\n",
    "try:\n",
    "    info = hdfs_client.status(external_fileset_path)\n",
    "    print(f\"Success: The storage location {external_fileset_path} reserved.\")\n",
    "except Exception:\n",
    "    print(f\"Faild: The storage location {external_fileset_path} deleted.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db137c72-e6dd-4e07-bc09-1b18803a6e16",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop schema\n",
    "catalog.as_schemas().drop_schema(schema_name=schema_name, cascade=True)\n",
    "\n",
    "# Check schema location if successfully deleted\n",
    "try:\n",
    "    info = hdfs_client.status(schema_path)\n",
    "    print(f\"Faild: The storage location {schema_path} was not successfully deleted.\")\n",
    "except Exception:\n",
    "    print(f\"Success: The storage location {schema_path} was successfully deleted.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb4cfd65-6d8c-4b67-b8ad-7aad991888f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop catalog\n",
    "result=gravitino_client.drop_catalog(name=catalog_name, force=True)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00309040-edd0-4d48-ab36-a6c598294ed7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop metalake\n",
    "result=gravitino_admin_client.drop_metalake(metalake_name, force=True)\n",
    "print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
